Forecast Electric power consumption - Tubas Electrical Company Data 2019¶

Table of Content¶

  • Data Analysis

Data Analysis¶

Import Libraries¶

In [1]:
import pandas as pd
import math
import numpy as np

# ---------------------------------------
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pylab 
plt.rcParams['figure.figsize']=(17,5)

# ---------------------------------------
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import iplot

import warnings
warnings.filterwarnings("ignore")

Define Functions¶

In [2]:
def line_plot(df, date, I, text, y_tiltle, x_title):
    data = go.Scatter(x = df[date],
                      y = df[I],
                      mode = 'lines',
                      name = I)

    layout = go.Layout(title={'text': text,
                              'y':0.9,
                              'x':0.5,
                              'xanchor': 'center',
                              'yanchor': 'top'},
                       xaxis = dict(title = x_title),
                       yaxis = dict(title = y_tiltle),
                       template = 'plotly_dark')

    fig = go.Figure(data = data, layout = layout)
    iplot(fig)
    

# --------------------------------------------------------------------
    
def line_3plot(df, date, columns, text, y_title, x_title):
    data = []
    for col in columns:
        trace = go.Scatter(
            x=df[date],
            y=df[col],
            mode='lines',
            name=col
        )
        data.append(trace)
    
    layout = go.Layout(
        title={
            'text': text,
            'y': 0.9,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        xaxis=dict(title=x_title),
        yaxis=dict(title=y_title),
        template='plotly_dark'
    )
    
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)


# ---------------------------------------
def bar_plot(col1, col2, agg_func, title, xtitle, ytitle):
    data = go.Bar(x = df.groupby(col1).agg({col2: agg_func}). \
              reset_index()[col1],
              y = df.groupby(col1).agg({col2: agg_func}). \
              reset_index()[col2],
              text =round(df.groupby(col1).agg({col2: agg_func}).reset_index()[col2],2),
              textposition= 'outside')

    layout = go.Layout(title={'text': title,
                              'y':0.9,
                              'x':0.5,
                              'xanchor': 'center',
                              'yanchor': 'top'},
                       xaxis = dict(title = xtitle),
                       yaxis = dict(title = ytitle),
                       template = 'plotly_dark')

    fig = go.Figure(data = data, layout = layout)
    iplot(fig)

Data Understanding¶

In [3]:
# read the data
df = pd.read_csv("data/input_data2019.csv", parse_dates=['Datetime'],index_col='Datetime').sort_values('Datetime')
df.head()
Out[3]:
I1 I2 I3
Datetime
2019-01-01 00:00:00 141.224344 141.851126 144.712441
2019-01-01 01:00:00 124.928353 122.933446 124.464044
2019-01-01 02:00:00 115.902869 114.001158 114.375987
2019-01-01 03:00:00 108.653699 107.037666 107.096276
2019-01-01 04:00:00 103.072493 99.546536 102.537506
In [4]:
# split days weeks months to facilitate EDA plots
df['date'] = df.index
df['hour'] = df['date'].dt.hour
df['day_of_week'] = df['date'].dt.dayofweek
df['weekday_name'] = df['date'].dt.day_name()
df['quarter'] = df['date'].dt.quarter
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['day_of_year'] = df['date'].dt.dayofyear
df['day_of_month'] = df['date'].dt.day

df = df.dropna()

Exploratory Data Analysis¶

We first look at the data in general, based on hours, days, and then months.

In [5]:
line_3plot(df, 'date', ['I1','I2','I3'], 'Current - Hourly', 'Current', 'Year')

Checking electric power consumption per days¶

In [6]:
scatter = [go.Scatter(x = df['hour'],
                      y = df['I1'],
                      mode ='markers',
                      marker = dict(color = df['I1'],
                                    showscale = True,
                                   colorscale = 'OrRd',
                                   colorbar = dict(title='I1'),
                                    size = 9,
                                    opacity = 0.55))]

layout = go.Layout(title={'text': "Current consumption by hour of day",
                          'y':0.9,
                          'x':0.5,
                          'xanchor': 'center',
                          'yanchor': 'top'},
                   xaxis = dict(title = 'Day'),
                   yaxis = dict(title = 'Current'),
                   template = 'plotly_dark')

fig = go.Figure(data = scatter, layout = layout)
iplot(fig)

Checking electric power consumption per months¶

In [7]:
bar_plot('month', 'I1', 'sum', 'Total current consumption per month', 
         'Month', 'Current')

Checking the hourly trend of electric power consumption every day of the week¶

In [8]:
df_pivot = df.pivot_table(index=df['hour'], columns='weekday_name', values='I1',
               aggfunc='sum')
df_pivot = df_pivot[['Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday',
       'Wednesday']].reset_index().rename(columns={'hour':'hour'})
In [9]:
Monday = go.Scatter(x = df_pivot['hour'],
                  y = df_pivot['Monday'],
                  mode = 'lines',
                  name = 'Monday')

Tuesday = go.Scatter(x = df_pivot['hour'],
                  y = df_pivot['Tuesday'],
                  mode = 'lines',
                  name = 'Tuesday')

Wednesday = go.Scatter(x = df_pivot['hour'],
                  y = df_pivot['Wednesday'],
                  mode = 'lines',
                  name = 'Wednesday')

Thursday = go.Scatter(x = df_pivot['hour'],
                  y = df_pivot['Thursday'],
                  mode = 'lines',
                  name = 'Thursday')

Friday = go.Scatter(x = df_pivot['hour'],
                  y = df_pivot['Friday'],
                  mode = 'lines',
                  name = 'Friday')

Saturday = go.Scatter(x = df_pivot['hour'],
                  y = df_pivot['Saturday'],
                  mode = 'lines',
                  name = 'Saturday')

Sunday = go.Scatter(x = df_pivot['hour'],
                  y = df_pivot['Sunday'],
                  mode = 'lines',
                  name = 'Sunday')

layout = go.Layout(title={'text': 'current consumption - hourly trends',
                          'y':0.9,
                          'x':0.5,
                          'xanchor': 'center',
                          'yanchor': 'top'},
                   xaxis = dict(title = 'Hour'),
                   yaxis = dict(title = 'Current'),
                   template = 'plotly_dark')

data = [Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday]
fig = go.Figure(data = data, layout = layout)
iplot(fig)